clear all
macro drop _all
set more off

*ssc install excelcol

********************************************************************************
*			 DIRECTORIES 
********************************************************************************

if ("`c(os)'"=="Windows") global ROOT "C:/Users/`c(username)'/Dropbox/PhD Maria Ulugbek Mapping indiv poverty" //Windows
if ("`c(os)'"=="MacOSX") global ROOT "/Users/`c(username)'/Dropbox/PhD Maria Ulugbek Mapping indiv poverty" //Mac

global origdata 	"$ROOT/2-data/1-raw"
global cultdata		"$ROOT/2-data/0-cultural data/Data"

global readydata  	"$ROOT/3-prog/CULTURE/replication/1_data"
global prog		 	"$ROOT/3-prog/CULTURE/replication/2_dofiles"
global shares		"$ROOT/3-prog/CULTURE/replication/3_shares"
global tables		"$ROOT/3-prog/CULTURE/replication/4_tables"
global figures		"$ROOT/3-prog/CULTURE/replication/5_figures"

********************************************************************************
*
*			DATA PREPARATION ** MALAWI ** 							
*
********************************************************************************

* Special values for missing
global missvals " 99999999 9999999 999999 99999 9999 999 777 888 999 7777 8888 9999 77777 88888 99999 777777 888888 999999 7777777 8888888 9999999 77777777 88888888 99999999 777777777 888888888 999999999"
global missvals_inlist "99999999, 9999999, 999999, 99999, 9999, 999,777,888,999,7777,8888,9999,77777,88888,99999,777777,888888,999999,7777777,8888888,9999999,77777777,88888888,99999999,777777777,888888888,999999999, 9999996"


* RECALL PERIODS
*******************
global denom "1" //changes this to 1 if need to yearly, and to 365 needed daily

* Hours (work hours in a year = 2080)
global hour1 "*2080/$denom"


* Days
forval i=1/1000 {
	global day`i' "*(365/`i')/$denom"
	di "${day`i'}"
}

* Weeks
forval i=1/100 {
	global week`i' 	"*(365/(`i'*7))/$denom"
}
global week05	"*(365/3.5)/$denom" //twice a week


* Months
forval i=1/100 {
	global month`i'	"*(12/`i')/$denom"
}

* Year
forval i=1/10 {
	global year`i'	"*1/(`i'*$denom)"
}


* Cutoff for winsorizing extreme values of expenditure items
global expoutcutoff "0 99.99"  

*------------------------------------------------------------------------------*
*
*					Malawi IHS 2016 (cross-sectional sample)
*
*------------------------------------------------------------------------------*
{
	********************************************************************************
	* Identifiers
	********************************************************************************
	* Regional and HH identifiers
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_A_FILT.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen hhid = case_id
	gen region2 = region
	decode region, gen(region2_name)
	gen region3 = district
	decode district, gen(region3_name)
	gen region4 = hh_a02a
	gen region5 = ea_id
	keep dataset_hhid1  hhid region2 region2_name region3 region3_name region4 region5
	duplicates drop
	tempfile hhidvars
	save `hhidvars'

	* Individual identifiers
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_B.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = PID //for combining
	gen pno = PID
	keep dataset_hhid1 dataset_pno1 pno 
	duplicates drop
	tempfile pidvars
	save `pidvars'

	* Weight 
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_A_FILT.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen sweight = hh_wgt
	keep dataset_hhid1  sweight
	tempfile weight
	save `weight'


	********************************************************************************
	* Household demographic characteristics
	********************************************************************************
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_B.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = PID //for combining

	* Basic demographics
	gen p_agey = hh_b05a
	gen p_agem = hh_b05b
	replace p_agey=p_agem/12 if (p_agey>=.|p_agey==0)  & p_agem<.

	gen p_age40=p_agey>40 if p_agey<.
	gen p_female = hh_b03==2 if hh_b03<.
	gen p_head = hh_b04==1 if hh_b04<.
	gen p_spouse = hh_b04==2 if hh_b04<.

	gen p_childage = 16 //defined according to child clothing
	gen p_child = 0 if p_agey<.
	replace p_child = 1 if p_agey<=p_childage & p_agey<.

	gen p_adult = 0 if p_agey<.
	replace p_adult = 1 if p_agey>p_childage & p_agey<.

	gen p_adult18 = 0 if p_agey<.
	replace p_adult18 = 1 if p_agey>=18 & p_agey<.

	gen p_adultchild=0 if p_agey<.
	replace p_adultchild=1 if p_agey>p_childage & p_agey<18


	* Language spoken group
	decode  hh_b22, gen(p_language) 
	replace p_language=lower(p_language)

	* Marital status
	gen p_married =  inlist(hh_b24,1,2)==1 if !inlist(hh_b24, .,9) 
	gen p_single =  hh_b24==6 if !inlist(hh_b24, .,9) 
	gen p_divorced =  hh_b24==4 if !inlist(hh_b24, .,9) 
	gen p_widow =  hh_b24==5 if !inlist(hh_b24, .,9) 
	gen p_separated =  hh_b24==3 if !inlist(hh_b24, .,9) 

	* Absence
	gen p_absent = hh_b07>=12 if !inlist(hh_b07, .,99)
	keep dataset_* p_*
	tempfile indchars1
	save `indchars1'

	* Education
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_C.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = PID //for combining
	gen p_noeduc = hh_c06==2 if hh_c06<.
	gen p_educyrs = hh_c08 
	gen p_eductert = inlist(hh_c09, 6,7)==1 if hh_c09<.
	replace p_eductert=0 if p_noeduc==1 & p_eductert>=.
	replace p_educyrs =0 if p_noeduc==1 & p_educyrs>=.
	replace p_noeduc=1 if p_educyrs==0 
	replace p_noeduc=0 if p_educyrs>0 & p_educyrs<.	
	keep dataset_* p_*
	duplicates drop
	tempfile indchars2
	save `indchars2'


	* Employment
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_E.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = PID //for combining

	** employee
	gen p_employee7d1 = inlist(hh_e13_1a, 1,5) if hh_e13_1a<.
	gen p_employee7d2 = inlist(hh_e13_1b, 1,5) if hh_e13_1b<.
	gen p_employee12m1 = inlist(hh_e06_8a, 1,5) if hh_e06_8a<.
	gen p_employee12m2 = inlist(hh_e06_8b, 1,5) if hh_e06_8b<.
	egen p_employee7d = rowmax(p_employee7d1 p_employee7d2)
	egen p_employee12m = rowmax(p_employee12m1 p_employee12m2)
	egen p_employee=rowmax(p_employee7d p_employee12m)
	drop p_employee7d1 p_employee7d2
	drop p_employee12m1 p_employee12m2

	** self employment
	gen p_selfempl7d1 = inlist(hh_e13_1a, 1,5) if hh_e13_1a<.
	gen p_selfempl7d2 = inlist(hh_e13_1b, 1,5) if hh_e13_1b<.
	gen p_selfempl12m1 = inlist(hh_e06_8a, 2,3) if hh_e06_8a<.
	gen p_selfempl12m2 = inlist(hh_e06_8b, 2,3) if hh_e06_8b<.
	egen p_selfempl7d = rowmax(p_selfempl7d1 p_selfempl7d2)
	egen p_selfempl12m = rowmax(p_selfempl12m1 p_selfempl12m2)
	egen p_selfempl=rowmax(p_selfempl7d p_selfempl12m)
	drop p_selfempl7d1 p_selfempl7d2
	drop p_selfempl12m1 p_selfempl12m2

	** working population
	egen p_working12m = rowmax(p_employee12m p_selfempl12m)
	egen p_working7d = rowmax(p_employee7d p_selfempl7d)
	egen p_working = rowmax(p_employee p_selfempl)


	* Wage
	destring hh_e40a hh_e41, replace
	foreach var in hh_e25 hh_e26a hh_e27 hh_e28a hh_e39 hh_e40a hh_e41 hh_e42a hh_e56 hh_e57 hh_e58 hh_e59 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen p_wage12m1 = hh_e25/hh_e26a
	replace p_wage12m1 = p_wage12m1$day1 if hh_e26b ==3
	replace p_wage12m1 = p_wage12m1$week1 if hh_e26b ==4
	replace p_wage12m1 = p_wage12m1$month1 if hh_e26b ==5

	gen p_wage12m2 = hh_e27/hh_e28a
	replace p_wage12m2 = p_wage12m2$day1 if hh_e28b ==3
	replace p_wage12m2 = p_wage12m2$week1 if hh_e28b ==4
	replace p_wage12m2 = p_wage12m2$month1 if hh_e28b ==5

	gen p_wage12m3 = hh_e39/hh_e40a
	replace p_wage12m3 = p_wage12m3$day1 if hh_e40b ==3
	replace p_wage12m3 = p_wage12m3$week1 if hh_e40b ==4
	replace p_wage12m3 = p_wage12m3$month1 if hh_e40b ==5

	gen p_wage12m4 = hh_e41/hh_e42a
	replace p_wage12m4 = p_wage12m4$day1 if hh_e42b ==3
	replace p_wage12m4 = p_wage12m4$week1 if hh_e42b ==4
	replace p_wage12m4 = p_wage12m4$month1 if hh_e42b ==5

	gen p_wage12m5 = hh_e56*hh_e57*hh_e58*hh_e59$month12

	egen p_wage = rowtotal(p_wage12m*), missing
	drop p_wage12m*

	keep dataset_* p_*
	duplicates drop
	tempfile indchars3
	save `indchars3'


	* Income from agriculture
	** revenues
	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_I.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_i06a //for combining

	foreach var in ag_i03 ag_i06a ag_i06b ag_i06c {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_i03/((ag_i06a<90&ag_i06a>0)+(ag_i06b<90&ag_i06b>0)+(ag_i06c<90&ag_i06c>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev1
	save `p_agrev1'
	sum


	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_I.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_i06b //for combining

	foreach var in ag_i03 ag_i06a ag_i06b ag_i06c {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i03/((ag_i06a<90&ag_i06a>0)+(ag_i06b<90&ag_i06b>0)+(ag_i06c<90&ag_i06c>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev2
	save `p_agrev2'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_I.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_i06c //for combining

	foreach var in ag_i03 ag_i06a ag_i06b ag_i06c {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i03/((ag_i06a<90&ag_i06a>0)+(ag_i06b<90&ag_i06b>0)+(ag_i06c<90&ag_i06c>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev3
	save `p_agrev3'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_o06a //for combining

	foreach var in ag_o03 ag_o06a ag_o06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o03/((ag_o06a<90&ag_o06a>0)+(ag_o06b<90&ag_o06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev4
	save `p_agrev4'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_o06b //for combining

	foreach var in ag_o03 ag_o06a ag_o06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o03/((ag_o06a<90&ag_o06a>0)+(ag_o06b<90&ag_o06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev5
	save `p_agrev5'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_Q.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_q06a //for combining

	foreach var in ag_q03 ag_q06a ag_q06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q03/((ag_q06a<90&ag_q06a>0)+(ag_q06b<90&ag_q06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev6
	save `p_agrev6'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_Q.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_q06b //for combining

	foreach var in ag_q03 ag_q06a ag_q06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q03/((ag_q06a<90&ag_q06a>0)+(ag_q06b<90&ag_q06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev7
	save `p_agrev7'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_S.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_s07a //for combining

	foreach var in ag_s06 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s06/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0)) 
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev8
	save `p_agrev8'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_S.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_s07b //for combining

	foreach var in ag_s06 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s06/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0)) 
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev9
	save `p_agrev9'
	sum


	** costs
	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_I.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_i06a //for combining

	foreach var in ag_i10 ag_i06a ag_i06b ag_i06c {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_i10/((ag_i06a<90&ag_i06a>0)+(ag_i06b<90&ag_i06b>0)+(ag_i06c<90&ag_i06c>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost1
	save `p_agcost1'
	sum


	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_I.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_i06b //for combining

	foreach var in ag_i10 ag_i06a ag_i06b ag_i06c {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i10/((ag_i06a<90&ag_i06a>0)+(ag_i06b<90&ag_i06b>0)+(ag_i06c<90&ag_i06c>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost2
	save `p_agcost2'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_I.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_i06c //for combining

	foreach var in ag_i10 ag_i06a ag_i06b ag_i06c {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i10/((ag_i06a<90&ag_i06a>0)+(ag_i06b<90&ag_i06b>0)+(ag_i06c<90&ag_i06c>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost3
	save `p_agcost3'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_o06a //for combining

	foreach var in ag_o10 ag_o06a ag_o06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o10/((ag_o06a<90&ag_o06a>0)+(ag_o06b<90&ag_o06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost4
	save `p_agcost4'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_o06b //for combining

	foreach var in ag_o10 ag_o06a ag_o06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o10/((ag_o06a<90&ag_o06a>0)+(ag_o06b<90&ag_o06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost5
	save `p_agcost5'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_Q.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_q06a //for combining

	foreach var in ag_q10 ag_q06a ag_q06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q10/((ag_q06a<90&ag_q06a>0)+(ag_q06b<90&ag_q06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost6
	save `p_agcost6'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_Q.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_q06b //for combining

	foreach var in ag_q10 ag_q06a ag_q06b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q10/((ag_q06a<90&ag_q06a>0)+(ag_q06b<90&ag_q06b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost7
	save `p_agcost7'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_S.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_s07a //for combining

	foreach var in ag_s13 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s13/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0)) 
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost8
	save `p_agcost8'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_S.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = ag_s07b //for combining

	foreach var in ag_s13 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s13/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0)) 
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost9
	save `p_agcost9'
	sum


	* Income from business
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.25*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==2 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc1
	save `p_businc1'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.5*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==3 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc2
	save `p_businc2'
	sum


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.75*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==4 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc3
	save `p_businc3'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=1*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==5 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc4
	save `p_businc4'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.25*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==2 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc5
	save `p_businc5'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.5*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==3 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc6
	save `p_businc6'
	sum


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.75*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==4 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc7
	save `p_businc7'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=1*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==5 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc8
	save `p_businc8'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 hh_n13a hh_n13b  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if (hh_n13a==0 | hh_n13a==.) & (hh_n13b==0 | hh_n13b==.)
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc9
	save `p_businc9'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_N2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 hh_n13a hh_n13b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if (hh_n13a==0 | hh_n13a==.) & (hh_n13b==0 | hh_n13b==.)
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc10 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc10
	save `p_businc10'
	sum


	* Other income
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_P.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_p04a //for combining

	foreach var in hh_p02 hh_p04a hh_p04b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_p02/((hh_p04a<90&hh_p04a>0)+(hh_p04b<90&hh_p04b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc1
	save `p_oinc1'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_P.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_p04b //for combining

	foreach var in hh_p02 hh_p04a hh_p04b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_p02/((hh_p04a<90&hh_p04a>0)+(hh_p04b<90&hh_p04b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc2
	save `p_oinc2'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_R.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_r05a //for combining

	foreach var in hh_r02a hh_r02b hh_r05a hh_r05b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux1=hh_r02a/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux1 = aux1$month12
	gen aux2=hh_r02b/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux2=aux2$month12
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 dataset_pno1: egen p_oinc3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc3
	save `p_oinc3'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_R.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_r05b //for combining

	foreach var in hh_r02a hh_r02b hh_r05a hh_r05b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux1=hh_r02a/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux1 = aux1$month12
	gen aux2=hh_r02b/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux2=aux2$month12
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 dataset_pno1: egen p_oinc4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc4
	save `p_oinc4'
	sum


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_o13_1a //for combining

	foreach var in hh_o13 hh_o13_1a hh_o13_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o13/((hh_o13_1a<90&hh_o13_1a>0)+(hh_o13_1b<90&hh_o13_1b>0)) 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_oinc5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc5
	save `p_oinc5'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_o13_1b //for combining

	foreach var in hh_o13 hh_o13_1a hh_o13_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o13/((hh_o13_1a<90&hh_o13_1a>0)+(hh_o13_1b<90&hh_o13_1b>0)) 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_oinc6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc6
	save `p_oinc6'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_o14_1a //for combining

	foreach var in hh_o14 hh_o14_1a hh_o14_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o14/((hh_o14_1a<90&hh_o14_1a>0)+(hh_o14_1b<90&hh_o14_1b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc7
	save `p_oinc7'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_o14_1b //for combining

	foreach var in hh_o14 hh_o14_1a hh_o14_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o14/((hh_o14_1a<90&hh_o14_1a>0)+(hh_o14_1b<90&hh_o14_1b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc8
	save `p_oinc8'
	sum

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_o18a //for combining

	foreach var in hh_o17 hh_o18a hh_o18b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o17/((hh_o18a<90&hh_o18a>0)+(hh_o18b<90&hh_o18b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc9
	save `p_oinc9'
	sum


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_O.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen dataset_pno1 = hh_o18b //for combining

	foreach var in hh_o17 hh_o18a hh_o18b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o17/((hh_o18a<90&hh_o18a>0)+(hh_o18b<90&hh_o18b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc10 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc10
	save `p_oinc10'
	sum


	* Combine all individual-level data
	use `pidvars', clear

	merge m:1 dataset_hhid* using `hhidvars'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars1'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars2'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars3'
	drop if _merge==2
	drop _merge

	forval x=1/9 {
		merge 1:1 dataset_hhid* dataset_pno* using `p_agrev`x''
		drop if _merge==2
		drop _merge

		merge 1:1 dataset_hhid* dataset_pno* using `p_agcost`x''
		drop if _merge==2
		drop _merge
	}

	forval x=1/10 {
		merge 1:1 dataset_hhid* dataset_pno* using `p_businc`x''
		drop if _merge==2
		drop _merge

		merge 1:1 dataset_hhid* dataset_pno* using `p_oinc`x''
		drop if _merge==2
		drop _merge
	}



	* Individual income from non-agri business activities
	egen p_agrev = rowtotal(p_agrev?), missing
	egen p_agcost = rowtotal(p_agcost?), missing
	drop p_agrev? p_agcost?

	gen p_aginc=.
	replace p_aginc = p_agrev 
	replace p_aginc = p_aginc - p_agcost if p_agcost<.
	replace p_aginc = 0 if p_aginc<0
	drop p_agrev p_agcost

	egen p_businc = rowtotal(p_businc?), missing
	replace p_businc = 0 if p_businc<0
	drop p_businc?

	egen p_oinc = rowtotal(p_oinc?), missing
	drop p_oinc?

	egen p_inc = rowtotal(p_wage p_aginc p_businc p_oinc), missing
	replace p_inc = 0 if p_working==0 & p_inc>=.


	* Converting individual variables to HH-level
	drop if p_absent==1 //keep only HH members, drop those who left or died

	* Household size
	bys hhid: egen hh_size=count(pno)

	* Number of adults and children
	bys hhid : egen hh_nadult = sum(p_adult), missing
	bys hhid : egen hh_nchild = sum(p_child), missing
	bys hhid : egen hh_nadultchild = sum(p_adultchild), missing

	* Number of men, women, boys and girls
	cap drop aux 
	gen aux = (p_adult==1 & p_female==0) if p_adult<. & p_female<.
	bys hhid: egen hh_nmale = sum(aux), missing

	cap drop aux 
	gen aux = (p_adult==1 & p_female==1) if p_adult<. & p_female<.
	bys hhid: egen hh_nfemale = sum(aux), missing

	cap drop aux 
	gen aux = (p_child==1 & p_female==0) if p_child<. & p_female<.
	bys hhid: egen hh_nboy = sum(aux), missing

	cap drop aux 
	gen aux = (p_child==1 & p_female==1) if p_child<. & p_female<.
	bys hhid: egen hh_ngirl = sum(aux), missing

	* Share of boys among children
	gen hh_pboy=hh_nboy/hh_nchild
	*replace hh_pboy=0 if hh_nchild==0

	* Share of women among adults
	gen hh_pfemale = hh_nfemale/hh_nadult
	*replace hh_pfemale=0 if hh_nadult==0 

	* Mean/maximum age by demographic groups
	cap drop aux
	gen aux = p_agey if p_adult==1
	bys hhid: egen hh_meanage_a = mean(aux)
	bys hhid: egen hh_maxage_a = max(aux)
	bys hhid: egen hh_minage_a = min(aux)

	cap drop aux
	gen aux = p_agey if p_adult==1 & p_female==1
	bys hhid: egen hh_meanage_f = mean(aux)
	bys hhid: egen hh_maxage_f = max(aux)
	bys hhid: egen hh_minage_f = min(aux)

	cap drop aux
	gen aux = p_agey if p_adult==1 & p_female==0
	bys hhid: egen hh_meanage_m = mean(aux)
	bys hhid: egen hh_maxage_m = max(aux)
	bys hhid: egen hh_minage_m = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1
	bys hhid: egen hh_meanage_c = mean(aux)
	bys hhid: egen hh_maxage_c = max(aux)
	bys hhid: egen hh_minage_c = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1 & p_female==1
	bys hhid: egen hh_meanage_g = mean(aux)
	bys hhid: egen hh_maxage_g = max(aux)
	bys hhid: egen hh_minage_g = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1 & p_female==0
	bys hhid: egen hh_meanage_b = mean(aux)
	bys hhid: egen hh_maxage_b = max(aux)
	bys hhid: egen hh_minage_b = min(aux)

	* Age of youngest member/reported child in HH
	bys hhid: egen hh_minage = min(p_agey)

	* Child age used now
	bys hhid: egen hh_childage = max(p_childage)


	* Education by adult demographic groups
	* Share of women/men with no education and tertiary education among adults in HH
	foreach x in noeduc eductert {
		
		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_f = auxsum/hh_nfemale  
		

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_m = auxsum/hh_nmale //hh_nadult

	}


	* Employment by adult demographic groups
	* Share of women/men with employment among adults in HH
	foreach x in employee selfempl working {

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_a=auxsum/hh_nadult

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_f=auxsum/hh_nfemale

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_m=auxsum/hh_nmale
		
	}

	* Household head
	foreach x in agey age40 female noeduc eductert married single divorced widow separated {
		cap drop aux
		gen aux = p_`x' if p_head==1
		bys hhid: egen hh_`x'_h = max(aux)
			
	}
	gen hh_language_h=p_language if p_head==1
	bys hhid (hh_language_h): replace hh_language_h = hh_language_h[_N] if missing(hh_language_h)
	** if HH head didn't report languageicity/language, take any non-missing value within HH (very few cases)
	cap drop aux 
	gen aux = p_language
	bys hhid (aux): replace aux = aux[_N] if missing(aux)
	bys hhid (aux): replace hh_language_h=aux if p_head==1 
	bys hhid (hh_language_h): replace hh_language_h = hh_language_h[_N] if missing(hh_language_h)
	drop aux
			
	* Sum of indiviudal incomes
	foreach x in wage inc {
		* HH income
		bys hhid: egen hh_`x' = sum(p_`x') , missing
		
		* Women's income
		cap drop aux
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen hh_`x'_f = sum(aux) , missing
		drop aux

		* Men's income
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen hh_`x'_m = sum(aux) , missing
		drop aux
	}

	* Adult equivalent children in HH: sum of adult equivalent children in the hh ; we should have nae<nchild
	generate p_ae = .
	replace p_ae = 0.68*p_child if inrange(p_agey,0,5) 
	replace p_ae = 0.71*p_child if inrange(p_agey,6,10) 
	replace p_ae = 0.91*p_child if inrange(p_agey,11,15) & p_female==0  //boys
	replace p_ae = 1.07*p_child if inrange(p_agey,16,17) & p_female==0  //boys
	replace p_ae = 0.88*p_child if inrange(p_agey,11,15) & p_female==1  //girls
	replace p_ae = 0.83*p_child if inrange(p_agey,16,17) & p_female==1  //girls

	replace p_ae= 0 if p_child==0

	bys hhid: egen hh_nae = sum(p_ae), missing // ae = 

	** CHECKS REPORTING OF AGE, GENDER AND RELATIONSHIP TO HH HEAD:
	* Drop households if no HH head reported, missing in age or gender variable

	* HH head not reported
	cap drop aux
	bys hhid: egen aux=max(p_head)
	gen nohead=(aux==0 | aux==.)
	drop aux

	* Any missing in age
	gen aux = p_agey>=.
	bys hhid: egen agemiss=max(aux)
	drop aux

	* Any missing in gender
	gen aux = p_female>=.
	bys hhid: egen gendermiss=max(aux)
	drop aux

	drop pno p_* dataset_pno*
	duplicates drop

	tempfile hhdemvars
	save `hhdemvars'


	********************************************************************************
	* Household-level characteristics
	********************************************************************************
	* Urban dummy
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_A_FILT.dta", clear
	gen dataset_hhid1 = case_id //for combining

	gen hh_urban = reside==1 if reside<.
	keep dataset_hhid* hh_urban
	duplicates drop
	tempfile hhvars1
	save `hhvars1'

	* House ownership
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_F.dta", clear
	gen dataset_hhid1 = case_id //for combining

	gen hh_homeown = inlist(hh_f01,1,2)==1 if hh_f01<.
	keep dataset_hhid* hh_homeown
	duplicates drop
	tempfile hhvars2
	save `hhvars2'

	* Land ownership
	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_A.dta", clear
	gen dataset_hhid1 = case_id //for combining

	gen hh_landown1 = ag_b101b==1 if ag_b101b<.
	gen hh_landown2 =  ag_i101b==1 if ag_i101b<.
	egen hh_landown = rowmax(hh_landown1 hh_landown2)
	drop hh_landown1 hh_landown2
	keep dataset_hhid* hh_landown
	duplicates drop
	tempfile hhvars3
	save `hhvars3'
				
	* Household -level incomes
	** agricultural income
	** revenues
	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_R1.dta", clear
	gen dataset_hhid1 = case_id //for combining
	foreach var in ag_r17 ag_r16 ag_r19 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux1=ag_r17$month12
	gen aux2=(ag_r17/ag_r16)*ag_r19$month12
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 : egen hh_hhagrev = sum(aux), missing
	keep dataset_* hh_hhagrev
	duplicates drop 
	tempfile hh_hhagrev
	save `hh_hhagrev'

	** costs
	use  "$origdata/MWI_IHS_2016/1.data/AG_MOD_R2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	foreach var in ag_r27 ag_r28 ag_r29 ag_r25 ag_r26 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	local i=1
	foreach var in ag_r27 ag_r28 ag_r29 ag_r25 ag_r26 {
		gen aux`i' = `var'$month12
		local i=`i'+1
	}
	egen aux = rowtotal(aux1 aux2 aux3 aux4 aux5), missing
	bys dataset_hhid1 : egen hh_hhagcost = sum(aux), missing
	keep dataset_* hh_hhagcost
	duplicates drop 
	tempfile hh_hhagcost
	save `hh_hhagcost'


	* Latitude longitude
	use  "$origdata/MWI_IHS_2016/1.data/HouseholdGeovariablesIHS4.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen lat = lat_modified
	gen lon = lon_modified
	keep dataset_hhid* lat lon
	duplicates drop 
	tempfile gps
	save `gps' 
	 


	********************************************************************************
	* Consumption expenditure
	********************************************************************************

	* Clothing
	**************
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_J.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_j02
	foreach var in hh_j03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	** men
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 308 309 310 311 312 323{
		replace touse = 1 if prcodevar==`x'
	} 

	cap drop aux
	gen aux=hh_j03$month3  if touse==1
	winsor2 aux, replace cuts($expoutcutoff)	
	bys dataset_hhid1 : egen hh_cloth_m= sum(aux)


	** women
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 317 318 319 320 321 325 {
		replace touse = 1 if prcodevar==`x'
	} 

	cap drop aux
	gen aux=hh_j03$month3  if touse==1
	winsor2 aux, replace cuts($expoutcutoff)	
	bys dataset_hhid1 : egen hh_cloth_f= sum(aux)


	** children
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 301 302 303 304 305 306 307 313 314 315 316 322 324 {
		replace touse = 1 if prcodevar==`x'
	} 

	cap drop aux
	gen aux=hh_j03$month3  if touse==1
	winsor2 aux, replace cuts($expoutcutoff)	
	bys dataset_hhid1 : egen hh_cloth_c= sum(aux)

	keep dataset_* hh_cloth*
	duplicates drop

	tempfile hh_cloth
	save `hh_cloth'



	* Food
	*********
	** food price
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_G1.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_g02
	gen unitvar = hh_g04b 
	foreach var in hh_g05 hh_g04a  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux = hh_g05/hh_g04a
	*** choose median price per item code per HH, if one item was recorded more than one
	bys dataset_hhid1 prcodevar unitvar: egen price= median(aux)
	keep dataset_hhid1 prcodevar unitvar price
	duplicates drop
	*** replace missings with regional medians
	merge m:1 dataset_hhid1 using `hhidvars', nogen

	local regionlowest ""
	local i=5
	while `i'>=1{
		cap confirm var region`i' 
		if !_rc {
			local region "region`i'"
			cap drop price_p50
			bys `region' prcodevar  unitvar :  egen price_p50 = median(price)
			replace price=price_p50 if (price==0 | price>=.) & price_p50<.
			drop price_p50
		}
		else {
		}
		local i=`i'-1
	}			

	keep dataset_hhid1 prcodevar unitvar price 
	duplicates drop
	tempfile foodp
	save `foodp'

	** food consumption
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_G1.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_g02
	gen unitvar = hh_g03b
	foreach var in hh_g03a   {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	merge m:1 dataset_hhid1 prcodevar unitvar using `foodp', nogen

	cap drop aux
	gen aux = price*hh_g03a
	winsor2 aux, replace cuts($expoutcutoff) 
	replace aux = aux$week1
	bys dataset_hhid1 : egen hh_food= sum(aux)
	keep dataset_* hh_food
	duplicates drop
	tempfile hh_food
	save `hh_food'

	* Non-food public 
	********************
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_I1.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_i02
	foreach var in hh_i03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 101 102 104 105 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$week1
	bys dataset_hhid1 : egen hh_nfoodpub1= sum(aux)
	keep dataset_* hh_nfoodpub1
	duplicates drop
	tempfile hh_nfoodpub1
	save `hh_nfoodpub1'


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_I2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_i05
	foreach var in hh_i06  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 201 209 215 216 217 219 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i06 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month1
	bys dataset_hhid1 : egen hh_nfoodpub2= sum(aux)
	keep dataset_* hh_nfoodpub2
	duplicates drop
	tempfile hh_nfoodpub2
	save `hh_nfoodpub2'


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_J.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_j02
	foreach var in hh_j03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 328 329 330 331 333 338 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_j03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month3
	bys dataset_hhid1 : egen hh_nfoodpub3= sum(aux)
	keep dataset_* hh_nfoodpub3
	duplicates drop
	tempfile hh_nfoodpub3
	save `hh_nfoodpub3'


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_K1.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_k02
	foreach var in hh_k03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 402 403 404 408 409 410 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_k03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month12
	bys dataset_hhid1 : egen hh_nfoodpub4= sum(aux)
	keep dataset_* hh_nfoodpub4
	duplicates drop
	tempfile hh_nfoodpub4
	save `hh_nfoodpub4'

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_K2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_k02
	foreach var in hh_k03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 419 420 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_k03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month12
	bys dataset_hhid1 : egen hh_nfoodpub5= sum(aux)
	keep dataset_* hh_nfoodpub5
	duplicates drop
	tempfile hh_nfoodpub5
	save `hh_nfoodpub5'


	* Non-food private 
	********************
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_I1.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_i02
	foreach var in hh_i03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 103 106 107 108 109 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$week1
	bys dataset_hhid1 : egen hh_nfoodpriv1= sum(aux)
	keep dataset_* hh_nfoodpriv1
	duplicates drop
	tempfile hh_nfoodpriv1
	save `hh_nfoodpriv1'


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_I2.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_i05
	foreach var in hh_i06  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 202 203 204 205 206 207 210 211 212 213 214 218 220 221 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i06 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month1
	bys dataset_hhid1 : egen hh_nfoodpriv2= sum(aux)
	keep dataset_* hh_nfoodpriv2
	duplicates drop
	tempfile hh_nfoodpriv2
	save `hh_nfoodpriv2'


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_J.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_j02
	foreach var in hh_j03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 16 317 318 319 320 321 322 323 324 325 326 327 332 334 335 336 337 339 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_j03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month3
	bys dataset_hhid1 : egen hh_nfoodpriv3= sum(aux)
	keep dataset_* hh_nfoodpriv3
	duplicates drop
	tempfile hh_nfoodpriv3
	save `hh_nfoodpriv3'


	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_K1.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen prcodevar = hh_k02
	foreach var in hh_k03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 411 412 414 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_k03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month12
	bys dataset_hhid1 : egen hh_nfoodpriv4= sum(aux)
	keep dataset_* hh_nfoodpriv4
	duplicates drop
	tempfile hh_nfoodpriv4
	save `hh_nfoodpriv4'


	** health and education
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_C.dta", clear
	gen dataset_hhid1 = case_id //for combining
	foreach var in hh_c22j  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux = hh_c22j
	winsor2 aux, replace cuts($expoutcutoff) //trim
	replace aux = aux$month12
	bys dataset_hhid1 : egen hh_educexp= sum(aux)
	keep dataset_* hh_educexp
	duplicates drop
	tempfile hh_educexp
	save `hh_educexp'

	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_D.dta", clear
	gen dataset_hhid1 = case_id //for combining
	foreach var in hh_d10 hh_d11 hh_d12  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	local i=1
	foreach var in hh_d10 hh_d11 hh_d12 {
		cap drop aux`i'
		gen aux`i'=`var'
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		replace aux`i' = aux`i'$week4
		local i=`i'+1
	}
	egen aux = rowtotal(aux1 aux2 aux3), missing
	bys dataset_hhid1 : egen hh_healthexp= sum(aux)
	keep dataset_* hh_healthexp
	duplicates drop
	tempfile hh_healthexp
	save `hh_healthexp'


	* Rent and utilities
	**********************
	** any rent dummy
	use  "$origdata/MWI_IHS_2016/1.data/HH_MOD_F.dta", clear
	gen dataset_hhid1 = case_id //for combining

	gen hh_anyrent = hh_f01==6 if hh_f01<.

	** rent amount
	foreach var in hh_f03a hh_f04a {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_rent1 = hh_f03a
	winsor2 hh_rent1, replace cuts($expoutcutoff) //trim
	replace hh_rent1=hh_rent1$day1 if  hh_f03b==3
	replace hh_rent1=hh_rent1$week1 if  hh_f03b==4
	replace hh_rent1=hh_rent1$month1 if  hh_f03b==5
	replace hh_rent1=hh_rent1$year1 if  hh_f03b==6
	gen hh_rent2 = hh_f04a
	winsor2 hh_rent2, replace cuts($expoutcutoff) //trim
	replace hh_rent2=hh_rent2$day1 if  hh_f04b==3
	replace hh_rent2=hh_rent2$week1 if  hh_f04b==4
	replace hh_rent2=hh_rent2$month1 if  hh_f04b==5
	replace hh_rent2=hh_rent2$year1 if  hh_f04b==6
	egen hh_rent = rowtotal(hh_rent?), missing
	drop hh_rent?

	** water
	foreach var in hh_f37 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_water = hh_f37
	winsor2 hh_water, replace cuts($expoutcutoff) //trim
	replace hh_water = hh_water$month1


	** electricity
	foreach var in hh_f25 hh_f26b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_electr = hh_f25/hh_f26b
	winsor2 hh_electr, replace cuts($expoutcutoff) //trim
	replace hh_electr = hh_electr$day1 if hh_f26b==3
	replace hh_electr = hh_electr$week1 if hh_f26b==4
	replace hh_electr = hh_electr$month1 if hh_f26b==5
	replace hh_electr = hh_electr$year1 if hh_f26b==6

	** other energy
	foreach var in hh_f18 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_oenerg = hh_f18
	winsor2 hh_oenerg, replace cuts($expoutcutoff) //trim
	replace hh_oenerg = hh_oenerg$week1


	** phone costs - public
	foreach var in hh_f32 hh_f33a {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_phonepub = hh_f32/hh_f33a
	winsor2 hh_phonepub, replace cuts($expoutcutoff) //trim
	replace hh_phonepub = hh_phonepub$day1 if hh_f33b==3
	replace hh_phonepub = hh_phonepub$week1 if hh_f33b==4
	replace hh_phonepub = hh_phonepub$month1 if hh_f33b==5
	replace hh_phonepub = hh_phonepub$year1 if hh_f33b==6

	** phone costs - private
	foreach var in hh_f35 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_phonepriv = hh_f35
	winsor2 hh_phonepriv, replace cuts($expoutcutoff) //trim
	replace hh_phonepriv = hh_phonepriv$month1

	keep dataset_* hh_anyrent hh_rent hh_water hh_electr hh_oenerg hh_phone*
	duplicates drop 
	tempfile hh_rentutils
	save `hh_rentutils'


	* World Bank aggregate expenditure variables (for poverty calculations)
	*************************************************************************
	use  "$origdata/MWI_IHS_2016/1.data/IHS4_Consumption_Aggregate.dta", clear
	gen dataset_hhid1 = case_id //for combining
	gen rexp_wb = rexpagg
	keep dataset_hhid* rexp_wb
	tempfile rexp_wb
	save `rexp_wb'



	********************************************************************************
	* Combine all survey-based variables
	********************************************************************************
	use `hhidvars', clear
	merge 1:1 dataset_hhid1 using `hhdemvars'
	keep if _merge==3 
	drop _merge
	foreach x in weight hhvars1 hhvars2 hhvars3 hh_hhagrev hh_hhagcost gps hh_cloth ///
				 hh_food hh_nfoodpub1 hh_nfoodpub2 hh_nfoodpub3 hh_nfoodpub4 hh_nfoodpub5 ///
				 hh_nfoodpriv1 hh_nfoodpriv2 hh_nfoodpriv3 hh_nfoodpriv4 hh_educexp ///
				 hh_healthexp hh_rentutils rexp_wb {
		merge 1:1 dataset_hhid1  using ``x''
		drop if _merge==2
		drop _merge
	}


	* Income from agriculture
	gen hh_hhaginc = hh_hhagrev
	replace hh_hhaginc = hh_hhaginc-hh_hhagcost if hh_hhagcost<.
	replace hh_hhaginc=0 if hh_hhaginc<0
	drop hh_hhagcost hh_hhagrev

	* Total HH income 
	egen hh_hhinc = rowtotal(hh_hhaginc), missing //non-individalized
	egen hh_totinc = rowtotal(hh_inc), missing
	drop hh_hhaginc hh_hhinc

	* Relative income of women
	** women
	gen hh_relinc_f = hh_inc_f/hh_totinc

	* Private expenditure
	egen hh_privexp = rowtotal(hh_food hh_nfoodpriv? hh_educexp hh_healthexp hh_phonepriv)
	drop hh_nfoodpriv? hh_educexp hh_healthexp hh_phonepriv

	* Public expenditure
	egen hh_pubexp = rowtotal(hh_nfoodpub? hh_rent hh_water hh_electr hh_oenerg hh_phonepub)
	drop hh_nfoodpub?
	drop hh_rent hh_water hh_electr hh_oenerg hh_phonepub

	* Total expenditure
	egen hh_exp = rowtotal(hh_privexp hh_pubexp), missing

	********************************************************************************
	* Corrections
	********************************************************************************
	* Employment
	* if no men, women, then share of employed, self-employed, working is also zero
	foreach x in pworking pemployee pselfempl {
		replace hh_`x'_a=0 if hh_nadult==0 & hh_`x'_a>=.
		replace hh_`x'_f=0 if hh_nfemale==0 & hh_`x'_f>=.
		replace hh_`x'_m=0 if hh_nmale==0 & hh_`x'_m>=.
	}


	* Demographcics:
	* - Age is zero if no men, women, or children
	foreach x in mean max min {
		replace hh_`x'age_a=0 if hh_nadult==0 & hh_`x'age_a>=.
		replace hh_`x'age_f=0 if hh_nfemale==0 & hh_`x'age_f>=.
		replace hh_`x'age_m=0 if hh_nmale==0 & hh_`x'age_m>=.

		replace hh_`x'age_c=0 if hh_nchild==0 & hh_`x'age_c>=.
		replace hh_`x'age_b=0 if hh_nboy==0 & hh_`x'age_b>=.
		replace hh_`x'age_g=0 if hh_ngirl==0 & hh_`x'age_g>=.	
		
	}


	* Share of boys and women
	replace hh_pboy = 0 if hh_nchild==0 & hh_pboy>=.
	replace hh_pfemale = 0 if hh_nadult==0 & hh_pfemale>=.

	* Education
	* - education is zero if no men, women, or children
	foreach x in pnoeduc peductert {
		replace hh_`x'_f=0 if hh_nfemale==0 & hh_`x'_f>=.
		replace hh_`x'_m=0 if hh_nmale==0 & hh_`x'_m>=.
	}


	* Income variables:
	replace hh_totinc=0 if hh_pworking_a==0 & hh_totinc>=.

	* - if income is 0, then relative income is also zero
	* - if no women, men, then respective relative incomes are zero
	* - If no working people, then total income is zero
	replace hh_inc_f=0 if hh_pworking_f==0 & hh_inc_f>=.
	replace hh_inc_f=0 if hh_nfemale==0 & hh_inc_f>=.
	replace hh_inc_f=0 if hh_totinc==0 & hh_inc_f>=.
	replace hh_relinc_f = 0 if hh_pworking_f==0 & hh_relinc_f>=.	
	replace hh_relinc_f = 0 if hh_nfemale==0 & hh_relinc_f>=.
	replace hh_relinc_f = 0 if hh_totinc==0 & hh_relinc_f>=.

	* Clothing expenditures (if only purchased items were recorded)
	foreach x in hh_cloth_f hh_cloth_m hh_cloth_c {
		replace `x' = 0 if `x'>=.
	}		

	
	* Sample identifier
	gen sample = "IHS"
	
	tempfile mwi_ihs
	save `mwi_ihs'
	
}

*------------------------------------------------------------------------------*
*
*					Malawi IHPS 2016 (panel sample)
*
*------------------------------------------------------------------------------*
{
	********************************************************************************
	* Identifiers
	********************************************************************************
	* Regional and HH identifiers
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_a_filt_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen hhid = y3_hhid
	gen region2 = region
	decode region, gen(region2_name)
	gen region3 = district
	decode district, gen(region3_name)
	gen region4 = ta_code
	gen region5 = ea_id
	keep dataset_hhid1  hhid region2 region2_name region3 region3_name region4 region5
	duplicates drop
	tempfile hhidvars
	save `hhidvars'

	* Individual identifiers
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_b_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = id_code //for combining
	gen pno = id_code
	keep dataset_hhid1 dataset_pno1 pno 
	duplicates drop
	tempfile id_codevars
	save `id_codevars'

	* Weight 
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_a_filt_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen sweight = hh_wgt
	keep dataset_hhid1  sweight
	tempfile weight
	save `weight'


	********************************************************************************
	* Household demographic characteristics
	********************************************************************************
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_b_16.dta", clear
	
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = id_code //for combining

	* Basic demographics
	gen p_agey = hh_b05a
	gen p_agem = hh_b05b
	** many missings in age, recalculate age using date of birth and interview time
	merge m:1 y3_hhid using "$origdata/MWI_IHPS_2016/1.data/hh_mod_a_filt_16.dta", keepusing(interviewdate_v1 interviewdate_v2)
	gen intm = substr(interviewdate_v2, 6,2)
	replace intm = substr(interviewdate_v1, 6,2) if intm==""
	gen inty = substr(interviewdate_v2, 1,4)
	replace inty = substr(interviewdate_v1, 1,4) if inty==""
	destring intm inty, replace force
	gen ageyaux = inty-hh_b06b if intm>=hh_b06a
	replace ageyaux = inty-hh_b06b-1 if intm<hh_b06a
	gen agemaux = intm-hh_b06a  if inty==hh_b06b & ageyaux==0
	replace agemaux = intm+(12-hh_b06a) if (inty-1)==hh_b06b & ageyaux==0
	replace p_agey =ageyaux if p_agey==.
	replace p_agem = agemaux if p_agem==.
	sum p_agey agey
	sum p_agem agem
	
	replace p_agey=p_agem/12 if (p_agey>=.|p_agey==0)  & p_agem<.

	gen p_age40=p_agey>40 if p_agey<.
	gen p_female = hh_b03==2 if hh_b03<.
	gen p_head = hh_b04==1 if hh_b04<.
	gen p_spouse = hh_b04==2 if hh_b04<.

	gen p_childage = 16 //defined according to child clothing
	gen p_child = 0 if p_agey<.
	replace p_child = 1 if p_agey<=p_childage & p_agey<.

	gen p_adult = 0 if p_agey<.
	replace p_adult = 1 if p_agey>p_childage & p_agey<.

	gen p_adult18 = 0 if p_agey<.
	replace p_adult18 = 1 if p_agey>=18 & p_agey<.

	gen p_adultchild=0 if p_agey<.
	replace p_adultchild=1 if p_agey>p_childage & p_agey<18

	* Language spoken group
	decode  hh_b22, gen(p_language) 
	replace p_language=lower(p_language)

	* Marital status
	gen p_married =  inlist(hh_b24,1,2)==1 if !inlist(hh_b24, .,9) 
	gen p_single =  hh_b24==6 if !inlist(hh_b24, .,9) 
	gen p_divorced =  hh_b24==4 if !inlist(hh_b24, .,9) 
	gen p_widow =  hh_b24==5 if !inlist(hh_b24, .,9) 
	gen p_separated =  hh_b24==3 if !inlist(hh_b24, .,9) 

	* Absence
	gen p_absent = inlist(hh_b06_2, 3,4)==1 if !inlist(hh_b06_2, .,99)
	keep dataset_* p_*
	tempfile indchars1
	save `indchars1'

	* Education
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_c_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = id_code //for combining
	gen p_noeduc = hh_c06==2 if hh_c06<.
	gen p_educyrs = hh_c08 
	gen p_eductert = inlist(hh_c09, 6,7)==1 if hh_c09<.
	replace p_eductert=0 if p_noeduc==1 & p_eductert>=.
	replace p_educyrs =0 if p_noeduc==1 & p_educyrs>=.
	replace p_noeduc=1 if p_educyrs==0 
	replace p_noeduc=0 if p_educyrs>0 & p_educyrs<.	
	keep dataset_* p_*
	duplicates drop
	tempfile indchars2
	save `indchars2'


	* Employment
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_e_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = id_code //for combining

	** employee
	gen p_employee7d1 = inlist(hh_e13_1a, 1,5) if hh_e13_1a<.
	gen p_employee7d2 = inlist(hh_e13_1b, 1,5) if hh_e13_1b<.
	gen p_employee12m1 = inlist(hh_e06_8a, 1,5) if hh_e06_8a<.
	gen p_employee12m2 = inlist(hh_e06_8b, 1,5) if hh_e06_8b<.
	egen p_employee7d = rowmax(p_employee7d1 p_employee7d2)
	egen p_employee12m = rowmax(p_employee12m1 p_employee12m2)
	egen p_employee=rowmax(p_employee7d p_employee12m)
	drop p_employee7d1 p_employee7d2
	drop p_employee12m1 p_employee12m2

	** self employment
	gen p_selfempl7d1 = inlist(hh_e13_1a, 1, 5) if hh_e13_1a<.
	gen p_selfempl7d2 = inlist(hh_e13_1b, 1,5) if hh_e13_1b<.
	gen p_selfempl12m1 = inlist(hh_e06_8a, 2,3) if hh_e06_8a<.
	gen p_selfempl12m2 = inlist(hh_e06_8b, 2,3) if hh_e06_8b<.
	egen p_selfempl7d = rowmax(p_selfempl7d1 p_selfempl7d2)
	egen p_selfempl12m = rowmax(p_selfempl12m1 p_selfempl12m2)
	egen p_selfempl=rowmax(p_selfempl7d p_selfempl12m)
	drop p_selfempl7d1 p_selfempl7d2
	drop p_selfempl12m1 p_selfempl12m2

	** working population
	egen p_working12m = rowmax(p_employee12m p_selfempl12m)
	egen p_working7d = rowmax(p_employee7d p_selfempl7d)
	egen p_working = rowmax(p_employee p_selfempl)


	* Wage
	destring hh_e40a hh_e41, replace
	foreach var in hh_e25 hh_e26a hh_e27 hh_e28a hh_e39 hh_e40a hh_e41 hh_e42a hh_e56 hh_e57 hh_e58 hh_e59 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen p_wage12m1 = hh_e25/hh_e26a
	replace p_wage12m1 = p_wage12m1$day1 if hh_e26b ==3
	replace p_wage12m1 = p_wage12m1$week1 if hh_e26b ==4
	replace p_wage12m1 = p_wage12m1$month1 if hh_e26b ==5

	gen p_wage12m2 = hh_e27/hh_e28a
	replace p_wage12m2 = p_wage12m2$day1 if hh_e28b ==3
	replace p_wage12m2 = p_wage12m2$week1 if hh_e28b ==4
	replace p_wage12m2 = p_wage12m2$month1 if hh_e28b ==5

	gen p_wage12m3 = hh_e39/hh_e40a
	replace p_wage12m3 = p_wage12m3$day1 if hh_e40b ==3
	replace p_wage12m3 = p_wage12m3$week1 if hh_e40b ==4
	replace p_wage12m3 = p_wage12m3$month1 if hh_e40b ==5

	gen p_wage12m4 = hh_e41/hh_e42a
	replace p_wage12m4 = p_wage12m4$day1 if hh_e42b ==3
	replace p_wage12m4 = p_wage12m4$week1 if hh_e42b ==4
	replace p_wage12m4 = p_wage12m4$month1 if hh_e42b ==5

	gen p_wage12m5 = hh_e56*hh_e57*hh_e58*hh_e59$month12

	egen p_wage = rowtotal(p_wage12m*), missing
	drop p_wage12m*

	keep dataset_* p_*
	duplicates drop
	tempfile indchars3
	save `indchars3'


	* Income from agriculture
	** revenues
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i14a //for combining

	foreach var in ag_i13 ag_i14a ag_i14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_i13/((ag_i14a<90&ag_i14a>0)+(ag_i14b<90&ag_i14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev1
	save `p_agrev1'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i14b //for combining

	foreach var in ag_i13 ag_i14a ag_i14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_i13/((ag_i14a<90&ag_i14a>0)+(ag_i14b<90&ag_i14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev2
	save `p_agrev2'
	sum
	
	

	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i23a //for combining

	foreach var in ag_i22 ag_i23a ag_i23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i22/((ag_i23a<90&ag_i23a>0)+(ag_i23b<90&ag_i23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev3
	save `p_agrev3'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i23b //for combining

	foreach var in ag_i22 ag_i23a ag_i23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i22/((ag_i23a<90&ag_i23a>0)+(ag_i23b<90&ag_i23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev4
	save `p_agrev4'
	sum
	
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_o14a //for combining

	foreach var in ag_o13 ag_o14a ag_o14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o13/((ag_o14a<90&ag_o14a>0)+(ag_o14b<90&ag_o14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev5
	save `p_agrev5'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_o14b //for combining

	foreach var in ag_o13 ag_o14a ag_o14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o13/((ag_o14a<90&ag_o14a>0)+(ag_o14b<90&ag_o14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev6
	save `p_agrev6'
	sum
	
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q14a //for combining

	foreach var in ag_q13 ag_q14a ag_q14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_q13/((ag_q14a<90&ag_q14a>0)+(ag_q14b<90&ag_q14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev7
	save `p_agrev7'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q14b //for combining

	foreach var in ag_q13 ag_q14a ag_q14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_q13/((ag_q14a<90&ag_q14a>0)+(ag_q14b<90&ag_q14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev8
	save `p_agrev8'
	sum
	
	

	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q23a //for combining

	foreach var in ag_q22 ag_q23a ag_q23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q22/((ag_q23a<90&ag_q23a>0)+(ag_q23b<90&ag_q23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev9
	save `p_agrev9'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q23b //for combining

	foreach var in ag_q22 ag_q23a ag_q23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q22/((ag_q23a<90&ag_q23a>0)+(ag_q23b<90&ag_q23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev10 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev10
	save `p_agrev10'
	sum
	
	
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_s_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_s07a //for combining

	foreach var in ag_s06 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s06/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev11 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev11
	save `p_agrev11'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_s_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_s07b //for combining

	foreach var in ag_s06 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s06/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agrev12 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agrev12
	save `p_agrev12'
	sum
	
	** costs
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i14a //for combining

	foreach var in ag_i18 ag_i14a ag_i14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_i18/((ag_i14a<90&ag_i14a>0)+(ag_i14b<90&ag_i14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost1
	save `p_agcost1'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i14b //for combining

	foreach var in ag_i18 ag_i14a ag_i14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_i18/((ag_i14a<90&ag_i14a>0)+(ag_i14b<90&ag_i14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost2
	save `p_agcost2'
	sum
	
	

	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i23a //for combining

	foreach var in ag_i27 ag_i23a ag_i23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i27/((ag_i23a<90&ag_i23a>0)+(ag_i23b<90&ag_i23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost3
	save `p_agcost3'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_i_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_i23b //for combining

	foreach var in ag_i27 ag_i23a ag_i23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_i27/((ag_i23a<90&ag_i23a>0)+(ag_i23b<90&ag_i23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost4
	save `p_agcost4'
	sum
	
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_o14a //for combining

	foreach var in ag_o18 ag_o14a ag_o14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o18/((ag_o14a<90&ag_o14a>0)+(ag_o14b<90&ag_o14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost5
	save `p_agcost5'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_o14b //for combining

	foreach var in ag_o18 ag_o14a ag_o14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_o18/((ag_o14a<90&ag_o14a>0)+(ag_o14b<90&ag_o14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost6
	save `p_agcost6'
	sum
	
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q14a //for combining

	foreach var in ag_q18 ag_q14a ag_q14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_q18/((ag_q14a<90&ag_q14a>0)+(ag_q14b<90&ag_q14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost7
	save `p_agcost7'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q14b //for combining

	foreach var in ag_q18 ag_q14a ag_q14b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	gen aux=ag_q18/((ag_q14a<90&ag_q14a>0)+(ag_q14b<90&ag_q14b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost8
	save `p_agcost8'
	sum
	
	

	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q23a //for combining

	foreach var in ag_q27 ag_q23a ag_q23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q27/((ag_q23a<90&ag_q23a>0)+(ag_q23b<90&ag_q23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost9
	save `p_agcost9'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_q_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_q23b //for combining

	foreach var in ag_q27 ag_q23a ag_q23b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_q27/((ag_q23a<90&ag_q23a>0)+(ag_q23b<90&ag_q23b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost10 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost10
	save `p_agcost10'
	sum
	
	
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_s_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_s07a //for combining

	foreach var in ag_s13 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s13/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost11 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost11
	save `p_agcost11'
	sum
	
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_s_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = ag_s07b //for combining

	foreach var in ag_s13 ag_s07a ag_s07b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=ag_s13/((ag_s07a<90&ag_s07a>0)+(ag_s07b<90&ag_s07b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_agcost12 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_agcost12
	save `p_agcost12'
	sum


	* Income from business
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.25*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==2 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc1
	save `p_businc1'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.5*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==3 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc2
	save `p_businc2'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.75*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==4 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc3
	save `p_businc3'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=1*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==5 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc4
	save `p_businc4'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.25*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==2 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc5
	save `p_businc5'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.5*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==3 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc6
	save `p_businc6'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=0.75*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==4 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc7
	save `p_businc7'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=1*hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if hh_n14==5 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc8
	save `p_businc8'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12a //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 hh_n13a hh_n13b  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if (hh_n13a==0 | hh_n13a==.) & (hh_n13b==0 | hh_n13b==.)
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc9
	save `p_businc9'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_n2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_n12b //for combining

	foreach var in hh_n40 hh_n12a hh_n12b hh_n14 hh_n13a hh_n13b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_n40/((hh_n12a<90&hh_n12a>0)+(hh_n12b<90&hh_n12b>0)) if (hh_n13a==0 | hh_n13a==.) & (hh_n13b==0 | hh_n13b==.)
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_businc10 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_businc10
	save `p_businc10'
	sum


	* Other income
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_p_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_p04a //for combining

	foreach var in hh_p02 hh_p04a hh_p04b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_p02/((hh_p04a<90&hh_p04a>0)+(hh_p04b<90&hh_p04b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc1
	save `p_oinc1'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_p_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_p04b //for combining

	foreach var in hh_p02 hh_p04a hh_p04b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_p02/((hh_p04a<90&hh_p04a>0)+(hh_p04b<90&hh_p04b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc2
	save `p_oinc2'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_r_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_r05a //for combining

	foreach var in hh_r02a hh_r02b hh_r05a hh_r05b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux1=hh_r02a/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux1 = aux1$month12
	gen aux2=hh_r02b/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux2=aux2$month12
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 dataset_pno1: egen p_oinc3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc3
	save `p_oinc3'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_r_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_r05b //for combining

	foreach var in hh_r02a hh_r02b hh_r05a hh_r05b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux1=hh_r02a/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux1 = aux1$month12
	gen aux2=hh_r02b/((hh_r05a<90&hh_r05a>0)+(hh_r05b<90&hh_r05b>0)) 
	replace aux2=aux2$month12
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 dataset_pno1: egen p_oinc4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc4
	save `p_oinc4'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_o13_1a //for combining

	foreach var in hh_o13 hh_o13_1a hh_o13_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o13/((hh_o13_1a<90&hh_o13_1a>0)+(hh_o13_1b<90&hh_o13_1b>0)) 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_oinc5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc5
	save `p_oinc5'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_o13_1b //for combining

	foreach var in hh_o13 hh_o13_1a hh_o13_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o13/((hh_o13_1a<90&hh_o13_1a>0)+(hh_o13_1b<90&hh_o13_1b>0)) 
	replace aux = aux$month1
	bys dataset_hhid1 dataset_pno1: egen p_oinc6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc6
	save `p_oinc6'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_o14_1a //for combining

	foreach var in hh_o14 hh_o14_1a hh_o14_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o14/((hh_o14_1a<90&hh_o14_1a>0)+(hh_o14_1b<90&hh_o14_1b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc7 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc7
	save `p_oinc7'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_o14_1b //for combining

	foreach var in hh_o14 hh_o14_1a hh_o14_1b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o14/((hh_o14_1a<90&hh_o14_1a>0)+(hh_o14_1b<90&hh_o14_1b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc8 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc8
	save `p_oinc8'
	sum

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_o18a //for combining

	foreach var in hh_o17 hh_o18a hh_o18b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o17/((hh_o18a<90&hh_o18a>0)+(hh_o18b<90&hh_o18b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc9 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc9
	save `p_oinc9'
	sum


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_o_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen dataset_pno1 = hh_o18b //for combining

	foreach var in hh_o17 hh_o18a hh_o18b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux=hh_o17/((hh_o18a<90&hh_o18a>0)+(hh_o18b<90&hh_o18b>0))
	replace aux = aux$month12
	bys dataset_hhid1 dataset_pno1: egen p_oinc10 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_oinc10
	save `p_oinc10'
	sum


	* Combine all individual-level data
	use `id_codevars', clear

	merge m:1 dataset_hhid* using `hhidvars'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars1'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars2'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars3'
	drop if _merge==2
	drop _merge

	forval x=1/9 {
		merge 1:1 dataset_hhid* dataset_pno* using `p_agrev`x''
		drop if _merge==2
		drop _merge

		merge 1:1 dataset_hhid* dataset_pno* using `p_agcost`x''
		drop if _merge==2
		drop _merge
	}

	forval x=1/10 {
		merge 1:1 dataset_hhid* dataset_pno* using `p_businc`x''
		drop if _merge==2
		drop _merge

		merge 1:1 dataset_hhid* dataset_pno* using `p_oinc`x''
		drop if _merge==2
		drop _merge
	}



	* Individual income from non-agri business activities
	egen p_agrev = rowtotal(p_agrev?), missing
	egen p_agcost = rowtotal(p_agcost?), missing
	drop p_agrev? p_agcost?

	gen p_aginc=.
	replace p_aginc = p_agrev 
	replace p_aginc = p_aginc - p_agcost if p_agcost<.
	replace p_aginc = 0 if p_aginc<0
	drop p_agrev p_agcost

	egen p_businc = rowtotal(p_businc?), missing
	replace p_businc = 0 if p_businc<0
	drop p_businc?

	egen p_oinc = rowtotal(p_oinc?), missing
	drop p_oinc?

	egen p_inc = rowtotal(p_wage p_aginc p_businc p_oinc), missing
	replace p_inc = 0 if p_working==0 & p_inc>=.


	* Converting individual variables to HH-level
	drop if p_absent==1 //keep only HH members, drop those who left or died

	* Household size
	bys hhid: egen hh_size=count(pno)

	* Number of adults and children
	bys hhid : egen hh_nadult = sum(p_adult), missing
	bys hhid : egen hh_nchild = sum(p_child), missing
	bys hhid : egen hh_nadultchild = sum(p_adultchild), missing

	* Number of men, women, boys and girls
	cap drop aux 
	gen aux = (p_adult==1 & p_female==0) if p_adult<. & p_female<.
	bys hhid: egen hh_nmale = sum(aux), missing

	cap drop aux 
	gen aux = (p_adult==1 & p_female==1) if p_adult<. & p_female<.
	bys hhid: egen hh_nfemale = sum(aux), missing

	cap drop aux 
	gen aux = (p_child==1 & p_female==0) if p_child<. & p_female<.
	bys hhid: egen hh_nboy = sum(aux), missing

	cap drop aux 
	gen aux = (p_child==1 & p_female==1) if p_child<. & p_female<.
	bys hhid: egen hh_ngirl = sum(aux), missing

	* Share of boys among children
	gen hh_pboy=hh_nboy/hh_nchild
	*replace hh_pboy=0 if hh_nchild==0

	* Share of women among adults
	gen hh_pfemale = hh_nfemale/hh_nadult
	*replace hh_pfemale=0 if hh_nadult==0 

	* Mean/maximum age by demographic groups
	cap drop aux
	gen aux = p_agey if p_adult==1
	bys hhid: egen hh_meanage_a = mean(aux)
	bys hhid: egen hh_maxage_a = max(aux)
	bys hhid: egen hh_minage_a = min(aux)

	cap drop aux
	gen aux = p_agey if p_adult==1 & p_female==1
	bys hhid: egen hh_meanage_f = mean(aux)
	bys hhid: egen hh_maxage_f = max(aux)
	bys hhid: egen hh_minage_f = min(aux)

	cap drop aux
	gen aux = p_agey if p_adult==1 & p_female==0
	bys hhid: egen hh_meanage_m = mean(aux)
	bys hhid: egen hh_maxage_m = max(aux)
	bys hhid: egen hh_minage_m = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1
	bys hhid: egen hh_meanage_c = mean(aux)
	bys hhid: egen hh_maxage_c = max(aux)
	bys hhid: egen hh_minage_c = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1 & p_female==1
	bys hhid: egen hh_meanage_g = mean(aux)
	bys hhid: egen hh_maxage_g = max(aux)
	bys hhid: egen hh_minage_g = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1 & p_female==0
	bys hhid: egen hh_meanage_b = mean(aux)
	bys hhid: egen hh_maxage_b = max(aux)
	bys hhid: egen hh_minage_b = min(aux)

	* Age of youngest member/reported child in HH
	bys hhid: egen hh_minage = min(p_agey)

	* Child age used now
	bys hhid: egen hh_childage = max(p_childage)


	* Education by adult demographic groups
	* Share of women/men with no education and tertiary education among adults in HH
	foreach x in noeduc eductert {
		
		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_f = auxsum/hh_nfemale  
		

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_m = auxsum/hh_nmale //hh_nadult

	}


	* Employment by adult demographic groups
	* Share of women/men with employment among adults in HH
	foreach x in employee selfempl working {

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_a=auxsum/hh_nadult

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_f=auxsum/hh_nfemale

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_m=auxsum/hh_nmale
		
	}

	* Household head
	foreach x in agey age40 female noeduc eductert married single divorced widow separated {
		cap drop aux
		gen aux = p_`x' if p_head==1
		bys hhid: egen hh_`x'_h = max(aux)
			
	}
	gen hh_language_h=p_language if p_head==1
	bys hhid (hh_language_h): replace hh_language_h = hh_language_h[_N] if missing(hh_language_h)
	** if HH head didn't report languageicity/language, take any non-missing value within HH (very few cases)
	cap drop aux 
	gen aux = p_language
	bys hhid (aux): replace aux = aux[_N] if missing(aux)
	bys hhid (aux): replace hh_language_h=aux if p_head==1 
	bys hhid (hh_language_h): replace hh_language_h = hh_language_h[_N] if missing(hh_language_h)
	drop aux
			
	* Sum of indiviudal incomes
	foreach x in wage inc {
		* HH income
		bys hhid: egen hh_`x' = sum(p_`x') , missing
		
		* Women's income
		cap drop aux
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen hh_`x'_f = sum(aux) , missing
		drop aux

		* Men's income
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen hh_`x'_m = sum(aux) , missing
		drop aux
	}

	* Adult equivalent children in HH: sum of adult equivalent children in the hh ; we should have nae<nchild
	generate p_ae = .
	replace p_ae = 0.68*p_child if inrange(p_agey,0,5) 
	replace p_ae = 0.71*p_child if inrange(p_agey,6,10) 
	replace p_ae = 0.91*p_child if inrange(p_agey,11,15) & p_female==0  //boys
	replace p_ae = 1.07*p_child if inrange(p_agey,16,17) & p_female==0  //boys
	replace p_ae = 0.88*p_child if inrange(p_agey,11,15) & p_female==1  //girls
	replace p_ae = 0.83*p_child if inrange(p_agey,16,17) & p_female==1  //girls

	replace p_ae= 0 if p_child==0

	bys hhid: egen hh_nae = sum(p_ae), missing // ae = 

	** CHECKS REPORTING OF AGE, GENDER AND RELATIONSHIP TO HH HEAD:
	* Drop households if no HH head reported, missing in age or gender variable

	* HH head not reported
	cap drop aux
	bys hhid: egen aux=max(p_head)
	gen nohead=(aux==0 | aux==.)
	drop aux

	* Any missing in age
	gen aux = p_agey>=.
	bys hhid: egen agemiss=max(aux)
	drop aux

	* Any missing in gender
	gen aux = p_female>=.
	bys hhid: egen gendermiss=max(aux)
	drop aux

	drop pno p_* dataset_pno*
	duplicates drop

	tempfile hhdemvars
	save `hhdemvars'


	********************************************************************************
	* Household-level characteristics
	********************************************************************************
	* Urban dummy
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_a_filt_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining

	gen hh_urban = reside==1 if reside<.
	keep dataset_hhid* hh_urban
	duplicates drop
	tempfile hhvars1
	save `hhvars1'

	* House ownership
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_f_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining

	gen hh_homeown = inlist(hh_f01,1,2)==1 if hh_f01<.
	keep dataset_hhid* hh_homeown
	duplicates drop
	tempfile hhvars2
	save `hhvars2'

	* Land ownership
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_a_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining

	gen hh_landown1 = ag_b101==1 if ag_b101<.
	gen hh_landown2 =  ag_i101b==1 if ag_i101b<.
	egen hh_landown = rowmax(hh_landown1 hh_landown2)
	drop hh_landown1 hh_landown2
	keep dataset_hhid* hh_landown
	duplicates drop
	tempfile hhvars3
	save `hhvars3'
				
	* Household -level incomes
	** agricultural income
	** revenues
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_r1_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	foreach var in ag_r17 ag_r16 ag_r19 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux1=ag_r17$month12
	gen aux2=(ag_r17/ag_r16)*ag_r19$month12
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 : egen hh_hhagrev = sum(aux), missing
	keep dataset_* hh_hhagrev
	duplicates drop 
	tempfile hh_hhagrev
	save `hh_hhagrev'

	** costs
	use  "$origdata/MWI_IHPS_2016/1.data/ag_mod_r2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	foreach var in ag_r27 ag_r28 ag_r29 ag_r25 ag_r26 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	local i=1
	foreach var in ag_r27 ag_r28 ag_r29 ag_r25 ag_r26 {
		gen aux`i' = `var'$month12
		local i=`i'+1
	}
	egen aux = rowtotal(aux1 aux2 aux3 aux4 aux5), missing
	bys dataset_hhid1 : egen hh_hhagcost = sum(aux), missing
	keep dataset_* hh_hhagcost
	duplicates drop 
	tempfile hh_hhagcost
	save `hh_hhagcost'


	* Latitude longitude
	use  "$origdata/MWI_IHPS_2016/1.data/HouseholdGeovariablesIHPSY3.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen lat = lat_modified
	gen lon = lon_modified
	keep dataset_hhid* lat lon
	duplicates drop 
	tempfile gps
	save `gps' 


	********************************************************************************
	* Consumption expenditure
	********************************************************************************

	* Clothing
	**************
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_j_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_j02
	foreach var in hh_j03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	** men
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 308 309 310 311 312 323{
		replace touse = 1 if prcodevar==`x'
	} 

	cap drop aux
	gen aux=hh_j03$month3  if touse==1
	winsor2 aux, replace cuts($expoutcutoff)	
	bys dataset_hhid1 : egen hh_cloth_m= sum(aux)


	** women
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 317 318 319 320 321 325 {
		replace touse = 1 if prcodevar==`x'
	} 

	cap drop aux
	gen aux=hh_j03$month3  if touse==1
	winsor2 aux, replace cuts($expoutcutoff)	
	bys dataset_hhid1 : egen hh_cloth_f= sum(aux)


	** children
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 301 302 303 304 305 306 307 313 314 315 316 322 324 {
		replace touse = 1 if prcodevar==`x'
	} 

	cap drop aux
	gen aux=hh_j03$month3  if touse==1
	winsor2 aux, replace cuts($expoutcutoff)	
	bys dataset_hhid1 : egen hh_cloth_c= sum(aux)

	keep dataset_* hh_cloth*
	duplicates drop

	tempfile hh_cloth
	save `hh_cloth'



	* Food
	*********
	** food price
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_g1_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_g02
	gen unitvar = hh_g04b 
	foreach var in hh_g05 hh_g04a  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux = hh_g05/hh_g04a
	*** choose median price per item code per HH, if one item was recorded more than one
	bys dataset_hhid1 prcodevar unitvar: egen price= median(aux)
	keep dataset_hhid1 prcodevar unitvar price
	duplicates drop
	*** replace missings with regional medians
	merge m:1 dataset_hhid1 using `hhidvars', nogen

	local regionlowest ""
	local i=5
	while `i'>=1{
		cap confirm var region`i' 
		if !_rc {
			local region "region`i'"
			cap drop price_p50
			bys `region' prcodevar  unitvar :  egen price_p50 = median(price)
			replace price=price_p50 if (price==0 | price>=.) & price_p50<.
			drop price_p50
		}
		else {
		}
		local i=`i'-1
	}			

	keep dataset_hhid1 prcodevar unitvar price 
	duplicates drop
	tempfile foodp
	save `foodp'

	** food consumption
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_g1_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_g02
	gen unitvar = hh_g03b
	foreach var in hh_g03a   {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	merge m:1 dataset_hhid1 prcodevar unitvar using `foodp', nogen

	cap drop aux
	gen aux = price*hh_g03a
	winsor2 aux, replace cuts($expoutcutoff) 
	replace aux = aux$week1
	bys dataset_hhid1 : egen hh_food= sum(aux)
	keep dataset_* hh_food
	duplicates drop
	tempfile hh_food
	save `hh_food'

	* Non-food public 
	********************
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_i1_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_i02
	foreach var in hh_i03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 101 102 104 105 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$week1
	bys dataset_hhid1 : egen hh_nfoodpub1= sum(aux)
	keep dataset_* hh_nfoodpub1
	duplicates drop
	tempfile hh_nfoodpub1
	save `hh_nfoodpub1'


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_i2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_i05
	foreach var in hh_i06  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 201 209 215 216 217 219 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i06 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month1
	bys dataset_hhid1 : egen hh_nfoodpub2= sum(aux)
	keep dataset_* hh_nfoodpub2
	duplicates drop
	tempfile hh_nfoodpub2
	save `hh_nfoodpub2'


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_j_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_j02
	foreach var in hh_j03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 328 329 330 331 333 338 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_j03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month3
	bys dataset_hhid1 : egen hh_nfoodpub3= sum(aux)
	keep dataset_* hh_nfoodpub3
	duplicates drop
	tempfile hh_nfoodpub3
	save `hh_nfoodpub3'


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_k1_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_k02
	foreach var in hh_k03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 402 403 404 408 409 410 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_k03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month12
	bys dataset_hhid1 : egen hh_nfoodpub4= sum(aux)
	keep dataset_* hh_nfoodpub4
	duplicates drop
	tempfile hh_nfoodpub4
	save `hh_nfoodpub4'

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_k2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_k02
	foreach var in hh_k03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 419 420 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_k03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month12
	bys dataset_hhid1 : egen hh_nfoodpub5= sum(aux)
	keep dataset_* hh_nfoodpub5
	duplicates drop
	tempfile hh_nfoodpub5
	save `hh_nfoodpub5'


	* Non-food private 
	********************
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_i1_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_i02
	foreach var in hh_i03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 103 106 107 108 109 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$week1
	bys dataset_hhid1 : egen hh_nfoodpriv1= sum(aux)
	keep dataset_* hh_nfoodpriv1
	duplicates drop
	tempfile hh_nfoodpriv1
	save `hh_nfoodpriv1'


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_i2_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_i05
	foreach var in hh_i06  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 202 203 204 205 206 207 210 211 212 213 214 218 220 221 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_i06 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month1
	bys dataset_hhid1 : egen hh_nfoodpriv2= sum(aux)
	keep dataset_* hh_nfoodpriv2
	duplicates drop
	tempfile hh_nfoodpriv2
	save `hh_nfoodpriv2'


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_j_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_j02
	foreach var in hh_j03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 16 317 318 319 320 321 322 323 324 325 326 327 332 334 335 336 337 339 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_j03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month3
	bys dataset_hhid1 : egen hh_nfoodpriv3= sum(aux)
	keep dataset_* hh_nfoodpriv3
	duplicates drop
	tempfile hh_nfoodpriv3
	save `hh_nfoodpriv3'


	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_k1_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	gen prcodevar = hh_k02
	foreach var in hh_k03  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 411 412 414 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in hh_k03 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month12
	bys dataset_hhid1 : egen hh_nfoodpriv4= sum(aux)
	keep dataset_* hh_nfoodpriv4
	duplicates drop
	tempfile hh_nfoodpriv4
	save `hh_nfoodpriv4'


	** health and education
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_c_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	foreach var in hh_c22j  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux = hh_c22j
	winsor2 aux, replace cuts($expoutcutoff) //trim
	replace aux = aux$month12
	bys dataset_hhid1 : egen hh_educexp= sum(aux)
	keep dataset_* hh_educexp
	duplicates drop
	tempfile hh_educexp
	save `hh_educexp'

	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_d_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining
	foreach var in hh_d10 hh_d11 hh_d12  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	local i=1
	foreach var in hh_d10 hh_d11 hh_d12 {
		cap drop aux`i'
		gen aux`i'=`var'
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		replace aux`i' = aux`i'$week4
		local i=`i'+1
	}
	egen aux = rowtotal(aux1 aux2 aux3), missing
	bys dataset_hhid1 : egen hh_healthexp= sum(aux)
	keep dataset_* hh_healthexp
	duplicates drop
	tempfile hh_healthexp
	save `hh_healthexp'


	* Rent and utilities
	**********************
	** any rent dummy
	use  "$origdata/MWI_IHPS_2016/1.data/hh_mod_f_16.dta", clear
	gen dataset_hhid1 = y3_hhid //for combining

	gen hh_anyrent = hh_f01==6 if hh_f01<.

	** rent amount
	foreach var in hh_f03a hh_f04a {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_rent1 = hh_f03a
	winsor2 hh_rent1, replace cuts($expoutcutoff) //trim
	replace hh_rent1=hh_rent1$day1 if  hh_f03b==3
	replace hh_rent1=hh_rent1$week1 if  hh_f03b==4
	replace hh_rent1=hh_rent1$month1 if  hh_f03b==5
	replace hh_rent1=hh_rent1$year1 if  hh_f03b==6
	gen hh_rent2 = hh_f04a
	winsor2 hh_rent2, replace cuts($expoutcutoff) //trim
	replace hh_rent2=hh_rent2$day1 if  hh_f04b==3
	replace hh_rent2=hh_rent2$week1 if  hh_f04b==4
	replace hh_rent2=hh_rent2$month1 if  hh_f04b==5
	replace hh_rent2=hh_rent2$year1 if  hh_f04b==6
	egen hh_rent = rowtotal(hh_rent?), missing
	drop hh_rent?

	** water
	foreach var in hh_f37 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_water = hh_f37
	winsor2 hh_water, replace cuts($expoutcutoff) //trim
	replace hh_water = hh_water$month1


	** electricity
	foreach var in hh_f25 hh_f26b {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_electr = hh_f25/hh_f26b
	winsor2 hh_electr, replace cuts($expoutcutoff) //trim
	replace hh_electr = hh_electr$day1 if hh_f26b==3
	replace hh_electr = hh_electr$week1 if hh_f26b==4
	replace hh_electr = hh_electr$month1 if hh_f26b==5
	replace hh_electr = hh_electr$year1 if hh_f26b==6

	** other energy
	foreach var in hh_f18 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_oenerg = hh_f18
	winsor2 hh_oenerg, replace cuts($expoutcutoff) //trim
	replace hh_oenerg = hh_oenerg$week1


	** phone costs - public
	foreach var in hh_f32 hh_f33a {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_phonepub = hh_f32/hh_f33a
	winsor2 hh_phonepub, replace cuts($expoutcutoff) //trim
	replace hh_phonepub = hh_phonepub$day1 if hh_f33b==3
	replace hh_phonepub = hh_phonepub$week1 if hh_f33b==4
	replace hh_phonepub = hh_phonepub$month1 if hh_f33b==5
	replace hh_phonepub = hh_phonepub$year1 if hh_f33b==6

	** phone costs - private
	foreach var in hh_f35 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_phonepriv = hh_f35
	winsor2 hh_phonepriv, replace cuts($expoutcutoff) //trim
	replace hh_phonepriv = hh_phonepriv$month1

	keep dataset_* hh_anyrent hh_rent hh_water hh_electr hh_oenerg hh_phone*
	duplicates drop 
	tempfile hh_rentutils
	save `hh_rentutils'



	********************************************************************************
	* Combine all survey-based variables
	********************************************************************************
	use `hhidvars', clear
	merge 1:1 dataset_hhid1 using `hhdemvars'
	keep if _merge==3 
	drop _merge
	foreach x in weight hhvars1 hhvars2 hhvars3 hh_hhagrev hh_hhagcost gps hh_cloth ///
				 hh_food hh_nfoodpub1 hh_nfoodpub2 hh_nfoodpub3 hh_nfoodpub4 hh_nfoodpub5 ///
				 hh_nfoodpriv1 hh_nfoodpriv2 hh_nfoodpriv3 hh_nfoodpriv4 hh_educexp ///
				 hh_healthexp hh_rentutils {
		merge 1:1 dataset_hhid1  using ``x''
		drop if _merge==2
		drop _merge
	}


	* Income from agriculture
	gen hh_hhaginc = hh_hhagrev
	replace hh_hhaginc = hh_hhaginc-hh_hhagcost if hh_hhagcost<.
	replace hh_hhaginc=0 if hh_hhaginc<0
	drop hh_hhagcost hh_hhagrev

	* Total HH income 
	egen hh_hhinc = rowtotal(hh_hhaginc), missing //non-individalized
	egen hh_totinc = rowtotal(hh_inc), missing
	drop hh_hhaginc hh_hhinc

	* Relative income of women
	** women
	gen hh_relinc_f = hh_inc_f/hh_totinc

	* Private expenditure
	egen hh_privexp = rowtotal(hh_food hh_nfoodpriv? hh_educexp hh_healthexp hh_phonepriv)
	drop hh_nfoodpriv? hh_educexp hh_healthexp hh_phonepriv

	* Public expenditure
	egen hh_pubexp = rowtotal(hh_nfoodpub? hh_rent hh_water hh_electr hh_oenerg hh_phonepub)
	drop hh_nfoodpub?
	drop hh_rent hh_water hh_electr hh_oenerg hh_phonepub

	* Total expenditure
	egen hh_exp = rowtotal(hh_privexp hh_pubexp), missing

	********************************************************************************
	* Corrections
	********************************************************************************
	* Employment
	* if no men, women, then share of employed, self-employed, working is also zero
	foreach x in pworking pemployee pselfempl {
		replace hh_`x'_a=0 if hh_nadult==0 & hh_`x'_a>=.
		replace hh_`x'_f=0 if hh_nfemale==0 & hh_`x'_f>=.
		replace hh_`x'_m=0 if hh_nmale==0 & hh_`x'_m>=.
	}


	* Demographcics:
	* - Age is zero if no men, women, or children
	foreach x in mean max min {
		replace hh_`x'age_a=0 if hh_nadult==0 & hh_`x'age_a>=.
		replace hh_`x'age_f=0 if hh_nfemale==0 & hh_`x'age_f>=.
		replace hh_`x'age_m=0 if hh_nmale==0 & hh_`x'age_m>=.

		replace hh_`x'age_c=0 if hh_nchild==0 & hh_`x'age_c>=.
		replace hh_`x'age_b=0 if hh_nboy==0 & hh_`x'age_b>=.
		replace hh_`x'age_g=0 if hh_ngirl==0 & hh_`x'age_g>=.	
		
	}


	* Share of boys and women
	replace hh_pboy = 0 if hh_nchild==0 & hh_pboy>=.
	replace hh_pfemale = 0 if hh_nadult==0 & hh_pfemale>=.

	* Education
	* - education is zero if no men, women, or children
	foreach x in pnoeduc peductert {
		replace hh_`x'_f=0 if hh_nfemale==0 & hh_`x'_f>=.
		replace hh_`x'_m=0 if hh_nmale==0 & hh_`x'_m>=.
	}


	* Income variables:
	replace hh_totinc=0 if hh_pworking_a==0 & hh_totinc>=.

	* - if income is 0, then relative income is also zero
	* - if no women, men, then respective relative incomes are zero
	* - If no working people, then total income is zero
	replace hh_inc_f=0 if hh_pworking_f==0 & hh_inc_f>=.
	replace hh_inc_f=0 if hh_nfemale==0 & hh_inc_f>=.
	replace hh_inc_f=0 if hh_totinc==0 & hh_inc_f>=.
	replace hh_relinc_f = 0 if hh_pworking_f==0 & hh_relinc_f>=.	
	replace hh_relinc_f = 0 if hh_nfemale==0 & hh_relinc_f>=.
	replace hh_relinc_f = 0 if hh_totinc==0 & hh_relinc_f>=.

	* Clothing expenditures (if only purchased items were recorded)
	foreach x in hh_cloth_f hh_cloth_m hh_cloth_c {
		replace `x' = 0 if `x'>=.
	}		

	gen sample = "IHPS"
	
	tempfile mwi_ihps
	save `mwi_ihps'

}


*------------------------------------------------------------------------------*
*
* 							Cultural data 
*
*------------------------------------------------------------------------------*

use `mwi_ihs', clear
append using `mwi_ihps'

*************************************************************************
* Individual matching 
*************************************************************************

* Prepare the Atlas dataset
preserve
	use "$cultdata/ethnographic_atlas_fixed.dta", clear  // Dataset from Ashraf_Bau_Nunn_Voena_JPE_2020_Replication_Files
	gen ethn_match=_n
	** generate cultural variables, based on Alesina et al 2021	
	gen patrilocal=v11==1 if v11!=0 & v11!=9
	gen neolocal=v11==2 if v11!=0 & v11!=9
	gen matrilocal=v11==3 if v11!=0 & v11!=9
	gen matrilineal=v43==3 if v43!=0
	gen patrilineal=v43==1  if v43!=0
	*g matrilineal_h=v74==2 | v74==3 if v74!=0
	*g patrilineal_h=v74==6 | v74==7  if v74!=0

	rename v104 lat 
	rename v106 lon
	global eth_vars "patrilocal neolocal matrilocal matrilineal patrilineal"
	keep ethn_match v107 $eth_vars //lat lon 
	tempfile atlas
	save `atlas'
restore


g ethn_match=233 if hh_language_h=="chewa"
replace ethn_match=951 if hh_language_h=="lambya"
replace ethn_match=638 if hh_language_h=="lomwe"
replace ethn_match=821 if hh_language_h=="ngoni"
replace ethn_match=842 if hh_language_h=="nkhonde"
replace ethn_match=842 if hh_language_h=="nyakyusa"
replace ethn_match=845 if hh_language_h=="nyanja"
replace ethn_match=845 if hh_language_h=="sena"
replace ethn_match=1152 if hh_language_h=="senga"
replace ethn_match=842 if hh_language_h=="sukwa"
replace ethn_match=614 if hh_language_h=="tonga"
replace ethn_match=1152 if hh_language_h=="tumbuka"
replace ethn_match=1234 if hh_language_h=="yao"

merge m:1 ethn_match using `atlas', keep(match master) 
drop if _merge==2
drop _merge
gen miss_eth_indiv=v107==""   

	  
*************************************************************************
* Geo matching 
*************************************************************************
/*
1. grid cell level information of population density in 2019 from LandScan 
2. Aggregate population by subnational region (higher level) and ethnic group (Giuliano and Nunn, 2018 shapefile)
We obtain a dataset with the current population of each region that lives in areas that were occupied by each ethnic group identified by its language (example BOL.cvs)
3. Merge language with ethnic group based on Giuliano and Nunn, 2018
4. Merge ethnic group with Atlas information
5. Average value of ethnographic traits at the regional level, weiteghed by the current population living in the area.
Obtains a dataset with the population-weighted average of the cultural traits for each region to merge with the expenditure survey (example: regions_BOL.dta).
*/

preserve
	* Prepare the shape file for geographical match: Ethnographic groups that lived in the geographical region of the household. 
	// shape file of global location of ethnographic groups in the Atlas, provided by Giuliano and Nunn (2018)
	tempfile map_data
	tempfile map_coordinates
	shp2dta using "$cultdata/Ethnologue_16_shapefile/langa_no_overlap_biggest_clean", data(`map_data')  coor(`map_coordinates') genid(_ID) replace 

	* Merge the shape file with the location of each group to their characteristics in the Atlas by the language spoken. 
	//The correspondence between language and ehnic gruop is provided by Giuliano and Nunn (2018)
	use `map_data', clear
	rename ID id
	merge m:1 id using "$cultdata/EthnoAtlas_Ethnologue16_baseline_by_language.dta", keep(master match) nogen keepusing(v107)
	rename v107 v107geo
	tempfile map_data2
	save `map_data2'
	
	use `atlas', clear
	foreach var in $eth_vars {
		rename `var' `var'_gps
	}
	rename v107 v107geo
	tempfile atlas2
	save `atlas2'
restore

* Combine with household survey data
geoinpoly lat lon using `map_coordinates', noproj
merge m:1 _ID using `map_data2', keep(master match) nogen keepusing(v107geo)    
merge m:1 v107geo using `atlas2', keep(match master)
drop if _merge==2
drop _merge
drop _ID
gen miss_eth_geo=patrilocal_gps==.

*************************************************************************
* Rename/labeling
*************************************************************************

* Rename
*************
** remove prefix "hh_" 
rename hh_* *

** demographics
rename nfemale nwomen
rename nmale nmen
rename meanage_a avage_a
rename meanage_c avage_k
rename meanage_f avage_f
rename meanage_m avage_m 
rename meanage_g avage_g
rename meanage_b avage_b
rename minage_c minage_k
rename maxage_c maxage_k
rename pfemale femaleratio

* Labels
**********
cap lab var sample "Survey sample" 
cap lab var hhno "HH no within cluster" 
cap lab var hhid "Unique HH ID"
cap lab var region2 "Region ID"
cap lab var region2_name "Region name" 
cap lab var region3 "District ID"
cap lab var region3_name "District name" 
cap lab var region4 "TA code"
cap lab var region5 "EA code" 
cap lab var sweight "Sampling weight" 
cap lab var size "HH size"
cap lab var nadult "Number of adults"
cap lab var nchild "Number of children" 
cap lab var nadultchild "Number of adult children (aged below 18)" 
cap lab var nae "Number of children, adult equivalent FAO"
cap lab var nmen "Number of men" 
cap lab var nwomen "Number of women" 
cap lab var nboy "Number of boys"
cap lab var ngirl "Number of girls"
cap lab var pboy "Proportion of boys"
cap lab var femaleratio "Proportion of women" 
cap lab var avage_a "Average age of adults" 
cap lab var avage_f "Average age of women" 
cap lab var avage_m "Average age of men" 
cap lab var avage_k "Average age of kids" 
cap lab var avage_g "Average age of girls"
cap lab var avage_b "Average age of boys"
cap lab var maxage_a "Maximum age of adults" 
cap lab var maxage_f "Maximum age of women" 
cap lab var maxage_m "Maximum age of men" 
cap lab var maxage_k "Maximum age of kids" 
cap lab var maxage_g "Maximum age of girls"
cap lab var maxage_b "Maximum age of boys"
cap lab var minage_a "Minimum age of adults" 
cap lab var minage_f "Minimum age of women" 
cap lab var minage_m "Minimum age of men" 
cap lab var minage_k "Minimum age of kids" 
cap lab var minage_g "Minimum age of girls"
cap lab var minage_b "Minimum age of boys"
cap lab var minage "Age of the youngest HH members"
cap lab var childage "Child age threshold" 
cap lab var pnoeduc_f	"Women with no education (as % of women in HH)"
cap lab var pnoeduc_m	"Men with no education (as % of men in HH)"
cap lab var peductert_f	"Women with tert. education (as % of women in HH)"
cap lab var peductert_m	"Men with tert. education (as % of men in HH)"
cap lab var pemployee_a		"Employed adults (as % of adults in HH)"
cap lab var pemployee_f		"Employed women (as % of women in HH)"
cap lab var pemployee_m		"Employed men (as % of men in HH)"
cap lab var pselfempl_a		"Self-employed adults (as % of adults in HH)"
cap lab var pselfempl_f		"Self-employed women (as % of women in HH)"
cap lab var pselfempl_m		"Self-employed men (as % of men in HH)"
cap lab var pworking_a		"Working adults (as % of adults in HH)"
cap lab var pworking_f		"Working women (as % of women in HH)"
cap lab var pworking_m		"Working men (as % of men in HH)"

cap lab var agey_h			"Age of HH head"
cap lab var age40_h			"HH head is older than 40 (=1)"
cap lab var female_h			"HH head is female (=1)"
cap lab var noeduc_h			"HH head has no education (=1)"
cap lab var eductert_h		"HH head has tertiary education (=1)"
cap lab var ethn_h			"HH head's ethnic group"
cap lab var married_h			"HH head is married (=1)"
cap lab var cohab_h			"HH head is cohabiting with partner (=1)"
cap lab var divorced_h		"HH head is divorced (=1)"
cap lab var separated_h		"HH head is separated (=1)"
cap lab var widow_h			"HH head is widow (=1)"
cap lab var single_h		"HH head is single (=1)"
cap lab var language_h		"HH head's language spoken"

cap lab var wage		"Sum of indivudal wages, annual (local currency)"
cap lab var wage_f		"Sum of women's indivudal wages, annual (local currency)"
cap lab var wage_m		"Sum of men's indivudal wages, annual (local currency)"
cap lab var inc			"Sum of individual incomes, annual (local currency)"
cap lab var inc_f		"Sum of women's individual incomes, annual (local currency)"
cap lab var inc_m		"Sum of men's individual incomes, annual (local currency)"
cap lab var totinc		"Total HH income, annual (local currency)"
cap lab var relinc_f 	"Relative income of women"

cap lab var nohead		"No HH head reported (=1)"
cap lab var agemiss		"Age not reported at least for one HH member (=1)"
cap lab var gendermiss	"Gender not reported at least for one HH member (=1)"

cap lab var urban		"Urban (=1)"
cap lab var homeown		"HH owns house (=1)"
cap lab var landown		"HH owns agricultural land (=1)"
cap lab var anyrent		"HH pays rent (=1)"

cap lab var cloth_m		"Annual HH expenditure on men's clothing (local currency)"
cap lab var cloth_f		"Annual HH expenditure on women's clothing (local currency)"
cap lab var cloth_c		"Annual HH expenditure on children's clothing (local currency)"
cap lab var food		"Annual HH food expenditure (local currency)"
cap lab var pubexp		"Annual HH public expenditure (local currency)"
cap lab var privexp		"Annual HH private expenditure (local currency)"
cap lab var exp			"Annual HH expenditure (local currency)"
cap lab var rexp_wb 	"Annual HH expenditure, WB estimate (local currency)"

cap lab var ethn_match "Ethnic group no."
cap lab var v107 "Society name" 
cap lab var patrilocal "Patrilocal (=1)" 
cap lab var neolocal "Neolocal (=1)"
cap lab var matrilocal "Matrilocal (=1)" 
cap lab var matrilineal "Matrilineal (=1)" 
cap lab var patrilineal "Patrilineal (=1)" 
cap lab var miss_eth_indiv "Missing ethno data, individual matching" 
cap lab var miss_eth_geo "Missing ethno data, geo matching" 
cap lab var patrilocal_gps "Patrilocal (=1)" 
cap lab var neolocal_gps "Neolocal (=1)"
cap lab var matrilocal_gps "Matrilocal (=1)" 
cap lab var matrilineal_gps "Matrilineal (=1)" 
cap lab var patrilineal_gps "Patrilineal (=1)" 

cap lab var lat "Latitude"
cap lab var lon "Longitude"

save "$replication/MWI.dta", replace
	
*************************************************************************
*  Indivudal-level data on women's control over earnings
*************************************************************************
local maxage=67

use "$origdata/MWI_IHS_2016/1.data/HH_MOD_E.dta", clear
merge 1:1 case_id PID using "$origdata/MWI_IHS_2016/1.data/HH_MOD_B.dta" 
cap drop _merge
merge m:1 case_id using "$origdata/MWI_IHS_2016/1.data/HH_MOD_A_FILT.dta" , keepusing(region ) 
egen language_h=max(hh_b22), by(case_id)

g ethn_match=233 if language_h==1
replace ethn_match=951 if language_h==11
replace ethn_match=638 if language_h==5
replace ethn_match=821 if language_h==7
replace ethn_match=842 if language_h==6
replace ethn_match=842 if language_h==9
replace ethn_match=845 if language_h==2
replace ethn_match=845 if language_h==8
replace ethn_match=1152 if language_h==12
replace ethn_match=842 if language_h==13
replace ethn_match=614 if language_h==10
replace ethn_match=1152 if language_h==4
replace ethn_match=1234 if language_h==3
	
cap drop _merge
merge m:1 ethn_match using `atlas', keep(match master) keepusing(v107 patrilocal patrilineal) 
ta patrilocal hh_b24_1

rename hh_b05a age
rename hh_b03 sex
gen female=sex==2
gen married=(hh_b24==1 )
gen work=(hh_e26_1a!=. | hh_e26_1b!=.)  
gen decides=(PID==hh_e26_1a | PID==hh_e26_1b) if work==1
keep if female==1  & married==1 
keep if age>=17 & age<=`maxage'
keep if work==1

keep case_id HHID PID age patrilocal decides region 

cap lab var age "Age" 
cap lab var patrilocal "Patrilocal (=1)"
cap lab var region "Region"
cap lab var decides "Control over own earnings"

save "$replication/MWI_women.dta", replace










